{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "be2a2a4a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Inference\n",
    "import os\n",
    "import sys\n",
    "import math\n",
    "import json\n",
    "import torch\n",
    "import argparse\n",
    "import textwrap\n",
    "import transformers\n",
    "from peft import PeftModel\n",
    "from transformers import GenerationConfig, TextStreamer\n",
    "from llama_attn_replace import replace_llama_attn\n",
    "from tqdm.notebook import tqdm\n",
    "import time\n",
    "\n",
    "PROMPT_DICT = {\n",
    "    \"prompt_no_input\": (\n",
    "        \"Below is an instruction that describes a task. \"\n",
    "        \"Write a response that appropriately completes the request.\\n\\n\"\n",
    "        \"### Instruction:\\n{instruction}\\n\\n### Response:\"\n",
    "    ),\n",
    "    \"prompt_no_input_llama2\": (\n",
    "        \"[INST] <<SYS>>\\n\"\n",
    "        \"You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\n\"\n",
    "        \"If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\\n\"\n",
    "        \"<</SYS>> \\n\\n {instruction} [/INST]\"\n",
    "    ),\n",
    "    \"prompt_llama2\": \"[INST]{instruction}[/INST]\"\n",
    "}\n",
    "\n",
    "def parse_config():\n",
    "    parser = argparse.ArgumentParser(description='arg parser')\n",
    "    parser.add_argument('--file_path', type=str, default=\"\")\n",
    "    parser.add_argument('--base_model', type=str, default=\"/data1/pretrained-models/llama-7b-hf\")\n",
    "    parser.add_argument('--cache_dir', type=str, default=\"./cache\")\n",
    "    parser.add_argument('--context_size', type=int, default=-1, help='context size during fine-tuning')\n",
    "    parser.add_argument('--flash_attn', type=bool, default=False, help='')\n",
    "    parser.add_argument('--temperature', type=float, default=0.6, help='')\n",
    "    parser.add_argument('--top_p', type=float, default=0.9, help='')\n",
    "    parser.add_argument('--max_gen_len', type=int, default=512, help='')\n",
    "    args = parser.parse_args()\n",
    "    return args\n",
    "\n",
    "def read_json_file(file_path):\n",
    "    with open(file_path, 'r') as file:\n",
    "        datas = json.load(file) \n",
    "    return datas\n",
    "    \n",
    "\n",
    "def build_generator(\n",
    "    model, tokenizer, temperature=0.6, top_p=0.9, max_gen_len=4096, use_cache=True\n",
    "):\n",
    "    def response(prompt):\n",
    "        inputs = tokenizer(prompt, return_tensors=\"pt\").to(model.device)\n",
    "        # streamer is for directly visualizing generated result through output, if you just want to inference one result and see it directly, please uncomment the following line\n",
    "        # streamer = TextStreamer(tokenizer)\n",
    "        output = model.generate(\n",
    "            **inputs,\n",
    "            max_new_tokens=max_gen_len,\n",
    "            temperature=temperature,\n",
    "            top_p=top_p,\n",
    "            use_cache=use_cache,\n",
    "#             streamer=streamer,\n",
    "        )\n",
    "        \n",
    "        out = tokenizer.decode(output[0], skip_special_tokens=True)\n",
    "        out = out.split(prompt.lstrip(\"<s>\"))[1].strip()\n",
    "        return out\n",
    "    \n",
    "    input_ids = tokenizer(sent, return_tensors=\"pt\").input_ids.to(\"cuda\")\n",
    "\n",
    "    return response\n",
    "\n",
    "# format input to make it align with the format of trainig data (input part)\n",
    "def input_format(data):\n",
    "    doi = data['doi']\n",
    "    paper = data['txt']\n",
    "    keywords = ','.join(data['keywords'])\n",
    "    prompt = f\"Attached is a detailed scientific paper.\\n\\n{paper}\\n\\nYour task is to formulate 10 sophisticated Q&A pairs that delve into the underlying scientific principles and knowledge presented in this paper, focusing specifically on {keywords}. Steer clear of questions that are purely section-specific (e.g., 'What does Figure 5 represent?') or basic or definitional questions (e.g., 'What is XXX?'). Instead, focus on questions that require a deeper understanding of the subject matter, especially those relating to complex chemical compounds (like Al2O3, C2H5OH, TNT). Ensure diversity in your Q&A pairs, avoiding any duplication. Answers should be rich in detail, drawing on specific data, chemical properties, and contextual insights from the paper. Strive for clarity and depth in your responses, aiming to enhance the reader's comprehension of the intricate concepts discussed.\"\n",
    "    \n",
    "    return prompt\n",
    "\n",
    "def main(args):\n",
    "    if args.flash_attn:\n",
    "        replace_llama_attn(inference=True)\n",
    "\n",
    "    # Set RoPE scaling factor\n",
    "    config = transformers.AutoConfig.from_pretrained(\n",
    "        args.base_model,\n",
    "        cache_dir=args.cache_dir,\n",
    "    )\n",
    "\n",
    "    orig_ctx_len = getattr(config, \"max_position_embeddings\", None)\n",
    "    if orig_ctx_len and args.context_size > orig_ctx_len:\n",
    "        scaling_factor = float(math.ceil(args.context_size / orig_ctx_len))\n",
    "        config.rope_scaling = {\"type\": \"linear\", \"factor\": scaling_factor}\n",
    "\n",
    "    # Load model and tokenizer\n",
    "    model = transformers.AutoModelForCausalLM.from_pretrained(\n",
    "        args.base_model,\n",
    "        config=config,\n",
    "        cache_dir=args.cache_dir,\n",
    "        torch_dtype=torch.float16,\n",
    "        device_map=\"auto\",\n",
    "    )\n",
    "    model.resize_token_embeddings(32001)\n",
    "\n",
    "    tokenizer = transformers.AutoTokenizer.from_pretrained(\n",
    "        args.base_model,\n",
    "        cache_dir=args.cache_dir,\n",
    "        model_max_length=args.context_size if args.context_size > orig_ctx_len else orig_ctx_len,\n",
    "        padding_side=\"right\",\n",
    "        use_fast=False,\n",
    "    )\n",
    "\n",
    "    if torch.__version__ >= \"2\" and sys.platform != \"win32\":\n",
    "        model = torch.compile(model)\n",
    "    model.eval()\n",
    "\n",
    "    respond = build_generator(model, tokenizer, temperature=args.temperature, top_p=args.top_p,\n",
    "                              max_gen_len=args.max_gen_len, use_cache=True)\n",
    "\n",
    "    datas = read_json_file(args.file_path)\n",
    "    prompt_no_input = PROMPT_DICT[\"prompt_llama2\"]\n",
    "    \n",
    "    s = time.time()\n",
    "    res = []\n",
    "    instruction = \"As a material science expert, utilize your expertise to analyze the provided scientific paper.\"\n",
    "    for idx, data in enumerate(datas):\n",
    "        prompt = prompt_no_input.format_map({\"instruction\": instruction + \"\\n\" + input_format(data)})\n",
    "        output = respond(prompt=prompt)\n",
    "        res.append({'doi': data['doi'], 'input': prompt, 'output': output})\n",
    "        if idx%10 == 0:\n",
    "            e = time.time()\n",
    "            print(f'{idx}: {e-s}')\n",
    "            s = time.time()\n",
    "    return res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "b9a9a49d",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0: 36.78651976585388\n",
      "10: 373.960896730423\n",
      "20: 351.62992668151855\n",
      "30: 349.70090532302856\n",
      "40: 343.12568521499634\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Token indices sequence length is longer than the specified maximum sequence length for this model (15416 > 12888). Running this sequence through the model will result in indexing errors\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "50: 389.2591211795807\n",
      "60: 361.09788727760315\n",
      "70: 360.734482049942\n",
      "80: 1432.6345703601837\n",
      "90: 351.71143865585327\n",
      "100: 344.2150914669037\n",
      "110: 349.33943700790405\n",
      "120: 366.43300914764404\n",
      "130: 326.4101474285126\n",
      "140: 333.6437246799469\n",
      "150: 328.5180959701538\n",
      "160: 339.9255440235138\n",
      "170: 385.0532178878784\n",
      "180: 322.3320896625519\n",
      "190: 363.4489061832428\n"
     ]
    }
   ],
   "source": [
    "import argparse\n",
    "args_dict = {'file_path': '/fl/data/acl/original/acl_eval_update.json', \n",
    "             'base_model': '/fl/model/vicuna/llama2-7b-12k', \n",
    "             'cache_dir': './cache', \n",
    "             'context_size': 12888, \n",
    "             'flash_attn': True, \n",
    "             'temperature': 0.6, \n",
    "             'top_p': 0.9, \n",
    "             'max_gen_len': 12888}\n",
    "args = argparse.Namespace(**args_dict)\n",
    "\n",
    "res = main(args)\n",
    "res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "ad648bc9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# record orginal inference results\n",
    "file_path = '/fl/data/acl/original/acl_eval_result.json'\n",
    "with open(file_path, 'w') as file:\n",
    "    json.dump(res, file, indent=4)  "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "14ebbad4-faa1-4e54-a35e-d2b2dd734aea",
   "metadata": {},
   "source": [
    "### Process original inference results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5140ec7e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(dict_keys(['doi', 'input', 'output']), 195)"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import json\n",
    "def read(file_path):\n",
    "    with open(file_path, 'r') as file:\n",
    "        datas = json.load(file) \n",
    "    return datas\n",
    "\n",
    "res = read('/fl/data/acl/original/acl_eval_result.json')\n",
    "res[0].keys(), len(res)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "d9f610b1",
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "pattern = r\"^\\d+\\.\\s\"\n",
    "\n",
    "def process_qa(qa):\n",
    "    try:\n",
    "        clean_qa = re.sub(pattern, \"\", qa).split('\\nA:')\n",
    "        return {'Q': clean_qa[0].strip(), 'A': f'A: {clean_qa[1].strip()}'}\n",
    "    except:\n",
    "        return None\n",
    "\n",
    "def deal_qa(qas):\n",
    "    qa_sep = list(map(process_qa, qas.split('\\n\\n')))\n",
    "    \n",
    "    q_set = set()\n",
    "    qa_unique = []    # keep unique Q&As only\n",
    "    for qa in qa_sep:\n",
    "        if qa != None:\n",
    "            q = qa['Q']\n",
    "            if q not in q_set:\n",
    "                q_set.add(q)\n",
    "                qa_unique.append(qa)\n",
    "    return qa_unique"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "cbc68b55",
   "metadata": {},
   "outputs": [],
   "source": [
    "info = read('/fl/data/acl/original/acl_eval_update.json')\n",
    "res_doi = {i['doi']: i for i in res}\n",
    "\n",
    "final = []\n",
    "for i in info:\n",
    "    data = res_doi[i['doi']]\n",
    "    qas = data['output']\n",
    "    qa_list = deal_qa(qas)\n",
    "    num_qa = len(qa_list)\n",
    "    \n",
    "    i['num_Q&A'] = num_qa\n",
    "    i['Q&A'] = qa_list\n",
    "    i['raw_output'] = qas\n",
    "    final.append(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "60d668c0",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'source': 'C:\\\\Users\\\\yixliu1\\\\Desktop\\\\txt_output_three_filtered_rep_skipped_240103\\\\Chemistry, Analytical\\\\10.1038-nmat1035.txt',\n",
       " 'doi': '10.1038/nmat1035',\n",
       " 'abstract': \"Molecular-dynamics simulations have recently been used to elucidate the transition with decreasing grain size from a dislocation-based to a grain-boundary-based deformation mechanism in nanocrystalline f.c.c. metals. This transition in the deformation mechanism results in a maximum yield strength at a grain size (the 'strongest size') that depends strongly on the stacking-fault energy, the elastic properties of the metal, and the magnitude of the applied stress. Here, by exploring the role of the stacking-fault energy in this crossover, we elucidate how the size of the extended dislocations nucleated from the grain boundaries affects the mechanical behaviour. Building on the fundamental physics of deformation as exposed by these simulations, we propose a two-dimensional stress-grain size deformation-mechanism map for the mechanical behaviour of nanocrystalline f.c.c. metals at low temperature. The map captures this transition in both the deformation mechanism and the related mechanical behaviour with decreasing grain size, as well as its dependence on the stacking-fault energy, the elastic properties of the material, and the applied stress level.\",\n",
       " 'keywords': ['nanocrystalline metals',\n",
       "  'deformation-mechanism map',\n",
       "  'molecular-dynamics simulation',\n",
       "  'grain size',\n",
       "  'grain-boundary-based deformation mechanism',\n",
       "  'metals',\n",
       "  'simulation',\n",
       "  'dislocations',\n",
       "  'stress',\n",
       "  'materials',\n",
       "  'prevalent deformation maps',\n",
       "  'grain boundary',\n",
       "  'operative deformation mechanisms',\n",
       "  'deformation',\n",
       "  'deformation behaviour',\n",
       "  'straight lines',\n",
       "  'gbs',\n",
       "  'processes',\n",
       "  'atomic level',\n",
       "  'nucleation'],\n",
       " 'txt': \"The idea of representing the deformation behaviour of materials in a two-dimensional-map format was first put forward in 1965 1. The basic principle of such representations is to use the best available constitutive equations 2,3 that describe the operative deformation mechanisms for dividing the deformation-parameter space into individual regions within which a single mechanism is rate controlling 4,5. The deformation-parameter space usually consists of two of the three normalized parameters: stress M, and grain size, M the melting temperature, 2, anticipates low-temperature superplasticity 6 and high strain-rate superplasticity 7,8. Both phenomena have now been experimentally observed in nanocrystalline materials 9,10 (that is, polycrystals with a grain size of less than 100 nm). This success implies that the deformation-map technique can possibly be extended to nanocrystalline materials. However, such an extension is not straightforward because the mechanical behaviour of these materials is still controversial 11, the observations ranging from greatly enhanced ductility 12,13,14,15 to dramatically increased yield strength 16,17,18,19. The prevalent deformation maps apparently cannot resolve this controversy, because they are based on empirical relations, rather than on fundamental physical principles. Here we construct a deformation-mechanism map based on fundamental physical processes in the deformation of nanocrystalline f.c.c. metals. The knowledge for these processes comes from modelling the nanocrystalline microstructure at the atomic level by using molecular-dynamics (MD) simulation. This is in contrast to the prevalent approach, where the deformation maps are built on phenomenological constitutive equations, which result from assumptions on certain deformation mechanisms derived from empirical data. The power of our approach is that it integrates the most important physics associated with deformation phenomena, in a self-consistent description of the effects of stress and grain size on the strain rate in nanocrystalline structures, thereby avoiding the empiricism of the prevalent approach. Moreover, ultimately this approach can test the basic assumptions underlying the phenomenological equations, assess the possible relevance of previously proposed mechanisms, and be of use to discover new deformation mechanisms not yet suggested in the literature and incorporate them in the deformation maps. Our deformation map is based on the physics of nucleation and propagation of dislocations in nanocrystalline f.c.c. metals at room temperature, as exposed by MD simulation. Simulations for idealized, fully three-dimensional (3D) nanocrystalline-Al microstructures with a grain size of up to 32 nm provided a detailed understanding of (i) the processes of dislocation nucleation and glide in the nanograins and (ii) the crossover with decreasing grain size in the deformation mechanism and in the related mechanical behaviour, from dislocation slip to a grain-boundary-based deformation process 20. Building on these insights, here we use the same simulation model 20 to investigate the effect of the stacking-fault energy (SFE) on the deformation behaviour. Our comparison of a low-SFE f.c.c. metal with the behaviour of Al, a high-SFE metal, provides novel insights into the structure and dynamics of the nucleated dislocations and their effect on the 'strongest size' 19 at which the crossover from a dislocation to a grain boundary (GB)-based mechanism occurs. These atomic-level insights enable us to subsequently construct our deformation map. Simulations of fully 3D nanocrystalline-Al microstructures 20 revealed the nucleation of complete 1/2[110] dislocations from the GBs and triple junctions, provided the grain size 20. As The crossover in the deformation mechanism arises from the length-scale competition between the grain size and the dislocation splitting distance, 24 where 0 = 1 2/ σ ∞( 2 1 and 2 depend on the elastic moduli of the material and the particular types of the two Shockley partials 24. An important aspect of the interplay between dislocation-dominated and GB-mediated deformation processes, characterized by the competition between the two length scales defined by Two identical microstructures with a grain size of 24 nm of the two potentials representing low and high SFE materials were loaded under uniform tensile stress slightly higher than the dislocation-nucleation threshold stress for these potentials (see To ensure similar deformation conditions, each system was loaded under uniform tensile stress approximately 0.1 GPa above its respective dislocation–nucleation threshold stress for this grain size. −2) and −2. Two distinct types of dislocation configurations are labelled as (1), indicating a complete, extended 1/2[110] dislocation and (2), indicating a stacking-fault plane produced by a single Shockley-partial dislocation emitted from a GB. The time-dependent plastic strains obtained for the two model materials after subtracting the corresponding elastic strains are compared in The deformation conditions are as for pl = el is obtained by subtracting the elastic strain, el, from the total strain, We interpret these distinct behaviours by observing that up to about 0.4% plastic strain, both systems nucleate only partial dislocations which propagate, with about equal velocity, into the grain interiors, giving rise to rather similar strain rates. (As the stress applied to each system, lying ∼0.1 GPa above the respective nucleation-threshold stress, was chosen so as to create similar deformation conditions, these rather similar initial strain rates are to be expected.) At this point, the two curves in This situation in which −3– −2 grain-size dependence of the competing GB-based deformation process (namely GB-diffusion creep) 20,25. Whereas in the high-SFE metal, the small grain size enables the GB-mediated deformation to dominate, the larger grain size in the low-SFE metal favours the partial dislocation slip. By contrast with our previous simulations of dislocation processes in columnar microstructures 26,27, in which the dislocations were constrained to be straight lines, here the dislocation lines are curved and they terminate on the GBs 20. Cheng 28 suggested that this introduces a grain-size dependence into the nucleation threshold stress. They proposed the existence of a GB dislocation source of the Frank–Read type, with a characteristic size of the order of the average distance between the triple lines. Further, assuming an inverse proportionality between the loop radius and the stress acting on the loop, they suggested 28 that the size of the loops nucleated from the GBs is proportional to the grain size, that is, that the nucleation stress is inversely proportional to the grain size, n ∼ 1/ 29 as nucleation of dislocations from GB ledges and their propagation through the forest dislocations commonly present in coarse-grained materials. This mechanism gives a Hall–Petch type of nucleation stress, n = 0 + 0 is the single-crystal yield stress and 29. Combining the two mechanisms one gets: These insights into the dislocation-nucleation mechanism and the structure of the dislocations in nanocrystalline f.c.c. metals can be captured in a deformation-mechanism map. The underlying physics comes from the length-scale competition between the grain size, σ/ σ ∞, and reduced inverse grain size, 0/ 0 and σ ∞ are therefore material-defining parameters, in contrast to the use of 1,2,3,4,5. The map shows three distinct regions in which either complete extended dislocations (Region I) or partial dislocations (Region II), or no dislocations at all (Region III) exist during the low-temperature deformation of nanocrystalline f.c.c. metals. The map is expressed in reduced units of stress ( ∞) and inverse grain size ( 0/ ∞ and 0 are functions of the stacking-fault energy and the elastic properties of the material. The variables in this map (see σ/ σ ∞ ≤ 1 and 0 ≤ 0/ σ = σ ∞ defines the upper stress limit, the line at 0 defines a lower grain-size limit of stability for the grains. At σ = σ ∞, the stacking-fault splitting distance becomes infinite. This means that an entire (111) plane can shear in the [112] direction, and the crystal structure becomes mechanically unstable. The limit for 0/ 0 would be unstable even at zero stress, and the structure would approach the amorphous limit. For the Al potential used here 30, 0 ≈ 1 nm (refs Within these limits, the map is transected by two straight diagonal lines ( 0 and σ ∞: for pure Al the fit to the Hall–Petch slope gives 32 0 = 5.91 × 10 −3 GPa and −5 m 1/2 GPa; by comparison, σ ∞ calculated for the case of a dislocation dissociated into a screw and an edge partial dislocations 24 gives a value of σ ∞ ≈ 1.5 GPa. (It should be noted that this value is lower than the theoretical shear strength, r = 2.84 GPa, calculated in ref. Because 0 and σ ∞, in practice the coarse-grained regime corresponding to equation (2a) will not show up on the scale of n ∼ 1/ 0, σ n = σ ∞; this then gives a slope of 1. As below this line no dislocations are present at all, the splitting line terminates at the nucleation line. The map is thus divided into three regions. In Region I, characteristic of a large grain size and/or a high-SFE metal, slip deformation prevails as the grains are larger than In Region II, characteristic of a small grain size and/or a low-SFE metal, only incomplete dislocations can be nucleated; the grains are therefore transected by stacking faults that inhibit dislocation propagation, thus giving rise to strain hardening. The transition from Region I to Region II at the splitting line ( σ = σ ∞ with 0, as discussed above. Region III characterizes the very small grain-size or low-stress regime in which no dislocations are present at any stress, and the deformation is therefore controlled by GB-mediated processes. These processes involve GB diffusion creep, that is, coupled GB sliding 21,22,23 and GB diffusion 25, resulting in the inverse Hall–Petch behaviour of the nanocrystalline metal 20,34. According to this map, the 'strongest size' may arise from two different scenarios for high and low SFE or at low and high stress. For high-SFE metals, Region II involves very high stress and the crossover from normal to inverse Hall–Petch behaviour should be governed mainly by the transition from perfect slip to GB-mediated deformation 20, that is, the 1/ 26. Although the above map represents an attempt to extrapolate the atomic mechanisms observed in MD simulations to a deformation regime that is accessible experimentally and in real materials, it is important to be aware of the inherent limitations of the MD approach. Apart from being restricted to relatively small model systems, by their very nature, MD simulations are limited to very high stresses and, hence, strain rates (of typically >10 7 s −1, corresponding to 1% strain in 1 ns). The stresses applied in the present simulations, although high, are not entirely unusual in experimental studies of nanocrystalline materials. In fact, experiments 5 on nanocrystalline Ni 3Al show dislocation nucleation and propagation for a grain size of 50 nm at a stress of 1.5GPa, that is, very similar to our simulation conditions. The transition from partial to perfect slip in nanocrystalline Al has also been observed experimentally 35. The issue of the high strain rates in MD simulations was recently also addressed in high-temperature deformation simulations 25 of nanocrystalline Pd. In spite of the extremely high strain rates (of >10 7 s −1), these simulations quantitatively validated the Coble-creep equation 36 describing GB diffusion creep in coarse-grained materials. However, as discussed in detail in ref. In summary, our study reveals how the crossover with decreasing grain size from dislocation-driven to GB-mediated deformation depends on the stacking-fault energy, the elastic properties of the material, and the magnitude of the applied stress. These insights suggest a novel deformation-mechanism map for nanocrystalline f.c.c. metals, which captures this crossover in both the prevailing deformation mechanism and the mechanical behaviour. This map provides a bridge between the fundamental physics of plastic deformation as exposed by MD simulation, and experiments towards deconvoluting the deformation behaviour of nanocrystalline materials. Our simulations were performed on idealized, fully dense and impurity-free model microstructures consisting of four grains of identical size and shape in the periodically repeated simulation cell. These input structures were prepared using a Voronoi construction with the grain centres placed on a 3D periodic f.c.c. lattice. The resulting grains have the shape of the f.c.c. Wigner–Seitz cell—a rhombic dodecahedron with 12 faces. By choosing random grain orientations and applying a Monte-Carlo procedure to avoid low-angle, vicinal and 'special' high-angle misorientations, we ensure that all the GBs in the system are general, high-energy GBs with a highly disordered atomic structure. Owing to the large number of faces per grain (12), the system forms a rather large number of GBs per unit volume (for a total of 24 distinct GBs in the simulation cell). The GBs meet at the triple lines at 120° angles, that is, close to the equilibrium angles; this ensures a high degree of stability against grain growth. Two interatomic potentials representing low and high SFE metals were applied. As representative for a low-SFE metal we consider a hypothetical material, which we name 'low-SFE Pd', described 37 by the EAM potential for Pd. This potential was fitted to bulk properties of Pd and yields a very low SFE of 8 mJ m −2, compared with the experimental value 38 of ∼180 mJ m −2. By contrast, with a SFE of 122 mJ m −2, the Ercolessi and Adams potential 30 for Al is representative for a high-SFE metal. We use the Parrinello–Rahman constant-load technique 39 combined with the Nose–Hoover thermostat 40 to achieve constant stress–constant temperature dynamics. All of our MD simulations were carried out at temperature 33 to be 2.84 GPa.) Throughout our simulations we use common-neighbour analysis (CNA) 41 to identify the stacking faults in our microstructures as double layers of atoms in h.c.p. surroundings. Atoms that are neither in perfect f.c.c. nor h.c.p. surroundings as defined by CNA are 'disordered atoms'; these are useful to identify the GBs and the dislocation cores. In the simulation snapshots shown in this paper, we use the following convention. Atoms in perfect f.c.c. surrounding are not shown; all atoms in h.c.p. surroundings are visualized in red; atoms that have more than a third of their neighbours (including first and second nearest neighbours) as disordered atoms are considered to be in a highly disordered environment and are visualized in blue. To best expose all the dislocation processes inside the grains while still keeping track of the grains themselves, we chose to visualize only a small part of each GB. This part is chosen with respect to a line of observation along one of the diagonals of the simulation cell. The GBs are then visualized as walls of grey atoms. (For more details, see ref. The authors declare no competing financial interests. The authors declare no competing financial interests. \",\n",
       " 'title': 'Deformation-mechanism map for nanocrystalline metals by molecular-dynamics simulation',\n",
       " 'num_Q&A': 10,\n",
       " 'Q&A': [{'Q': 'Q: What are the key factors that can affect the deformation behaviour of nanocrystalline metals?',\n",
       "   'A': 'A: The key factors that can affect the deformation behaviour of nanocrystalline metals include the grain size, the grain-boundary-based deformation mechanism, the elastic constants, the plastic deformation rate, and the initial stress.'},\n",
       "  {'Q': 'Q: How does the grain size affect the deformation behaviour of nanocrystalline metals?',\n",
       "   'A': 'A: As the grain size becomes smaller, the strength of the grain boundaries becomes larger, leading to a decrease in the deformation rate. Additionally, the presence of dislocations is affected by the grain size, with more dislocations observed in larger grains.'},\n",
       "  {'Q': 'Q: What is the main advantage of using molecular-dynamics simulation in studying the deformation behaviour of nanocrystalline metals?',\n",
       "   'A': 'A: The main advantage of using molecular-dynamics simulation in studying the deformation behaviour of nanocrystalline metals is that it allows for the investigation of the internal structure and dynamics of the nucleated dislocations at the atomic level. This is in contrast to experimental techniques, which can only provide macroscopic observations.'},\n",
       "  {'Q': 'Q: How does the grain-boundary-based deformation mechanism affect the deformation behaviour of nanocrystalline metals?',\n",
       "   'A': 'A: In the grain-boundary-based deformation mechanism, the strain is accommodated by the movement of the grain boundaries, which can result in the formation of dislocations if the boundaries are not flexible enough. Understanding the grain-boundary-based deformation mechanism is essential to predicting the deformation behaviour of nanocrystalline metals.'},\n",
       "  {'Q': 'Q: What are the three main deformation modes of nanocrystalline metals?',\n",
       "   'A': 'A: The three main deformation modes of nanocrystalline metals are: \\n   1. Grain-boundary-based deformation, \\n   2. Dislocation-based deformation, and \\n   3. Mixing-based deformation.'},\n",
       "  {'Q': 'Q: How does the elastic constant affect the deformation behaviour of nanocrystalline metals?',\n",
       "   'A': 'A: The elastic constant affects the deformation behaviour of nanocrystalline metals by determining the strength of the grain boundaries and the plastic deformation rate. The elastic constant represents the sum of the interactions between the atoms in the system and is sensitive to the atomic arrangement in the crystal.'},\n",
       "  {'Q': 'Q: What is the role of stress in the deformation behaviour of nanocrystalline metals?',\n",
       "   'A': 'A: Stress plays a crucial role in the deformation behaviour of nanocrystalline metals by causing the grain boundaries to deform and potentially leading to the nucleation of dislocations. The amount of stress required to nucleate dislocations depends on the elastic constant and the grain size.'},\n",
       "  {'Q': 'Q: What are the processes that can lead to the nucleation of dislocations in nanocrystalline metals?',\n",
       "   'A': 'A: The processes that can lead to the nucleation of dislocations in nanocrystalline metals include grain-boundary-based deformation, dislocation-based deformation, and mixing-based deformation.'},\n",
       "  {'Q': 'Q: How does the initial stress affect the deformation behaviour of nanocrystalline metals?',\n",
       "   'A': 'A: The initial stress affects the deformation behaviour of nanocrystalline metals by determining the amount of stress present at the onset of deformation. The initial stress can be either externally applied or result from the interaction of the grain boundaries with each other.'},\n",
       "  {'Q': 'Q: What is the main reason for the prevalence of straight lines in the deformation behaviour of nanocrystalline metals?',\n",
       "   'A': 'A: The main reason for the prevalence of straight lines in the deformation behaviour of nanocrystalline metals is the presence of dislocations, which affect the overall deformation behaviour of the material. Dislocations tend to align along straight lines, leading to the observation of straight lines in the deformation behaviour of nanocrystalline metals.'}],\n",
       " 'raw_output': '1. Q: What are the key factors that can affect the deformation behaviour of nanocrystalline metals?\\nA:The key factors that can affect the deformation behaviour of nanocrystalline metals include the grain size, the grain-boundary-based deformation mechanism, the elastic constants, the plastic deformation rate, and the initial stress.\\n\\n2. Q: How does the grain size affect the deformation behaviour of nanocrystalline metals?\\nA:As the grain size becomes smaller, the strength of the grain boundaries becomes larger, leading to a decrease in the deformation rate. Additionally, the presence of dislocations is affected by the grain size, with more dislocations observed in larger grains.\\n\\n3. Q: What is the main advantage of using molecular-dynamics simulation in studying the deformation behaviour of nanocrystalline metals?\\nA:The main advantage of using molecular-dynamics simulation in studying the deformation behaviour of nanocrystalline metals is that it allows for the investigation of the internal structure and dynamics of the nucleated dislocations at the atomic level. This is in contrast to experimental techniques, which can only provide macroscopic observations.\\n\\n4. Q: How does the grain-boundary-based deformation mechanism affect the deformation behaviour of nanocrystalline metals?\\nA:In the grain-boundary-based deformation mechanism, the strain is accommodated by the movement of the grain boundaries, which can result in the formation of dislocations if the boundaries are not flexible enough. Understanding the grain-boundary-based deformation mechanism is essential to predicting the deformation behaviour of nanocrystalline metals.\\n\\n5. Q: What are the three main deformation modes of nanocrystalline metals?\\nA:The three main deformation modes of nanocrystalline metals are: \\n   1. Grain-boundary-based deformation, \\n   2. Dislocation-based deformation, and \\n   3. Mixing-based deformation.\\n\\n6. Q: How does the elastic constant affect the deformation behaviour of nanocrystalline metals?\\nA:The elastic constant affects the deformation behaviour of nanocrystalline metals by determining the strength of the grain boundaries and the plastic deformation rate. The elastic constant represents the sum of the interactions between the atoms in the system and is sensitive to the atomic arrangement in the crystal.\\n\\n7. Q: What is the role of stress in the deformation behaviour of nanocrystalline metals?\\nA:Stress plays a crucial role in the deformation behaviour of nanocrystalline metals by causing the grain boundaries to deform and potentially leading to the nucleation of dislocations. The amount of stress required to nucleate dislocations depends on the elastic constant and the grain size.\\n\\n8. Q: What are the processes that can lead to the nucleation of dislocations in nanocrystalline metals?\\nA:The processes that can lead to the nucleation of dislocations in nanocrystalline metals include grain-boundary-based deformation, dislocation-based deformation, and mixing-based deformation.\\n\\n9. Q: How does the initial stress affect the deformation behaviour of nanocrystalline metals?\\nA:The initial stress affects the deformation behaviour of nanocrystalline metals by determining the amount of stress present at the onset of deformation. The initial stress can be either externally applied or result from the interaction of the grain boundaries with each other.\\n\\n10. Q: What is the main reason for the prevalence of straight lines in the deformation behaviour of nanocrystalline metals?\\nA:The main reason for the prevalence of straight lines in the deformation behaviour of nanocrystalline metals is the presence of dislocations, which affect the overall deformation behaviour of the material. Dislocations tend to align along straight lines, leading to the observation of straight lines in the deformation behaviour of nanocrystalline metals.'}"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "final[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "51e3e4c7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "184"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "process = [i for i in final if i['num_Q&A']==10]\n",
    "len(process)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "cc0b96ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "file_path = '/fl/data/acl/acl_eval_final.json'\n",
    "with open(file_path, 'w') as file:\n",
    "    json.dump(process, file, indent=4)  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "46fadc70",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "longlra",
   "language": "python",
   "name": "longlra"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
